#write the scrapy to the 电影天堂 in 2019.5.18

import requests
from lxml import etree

base_url='http://dytt8.net'

def getInfo(listInfo,l=1,num=10):
    url='http://dytt8.net/html/gndy/dyzz/list_23_'+str(l)+'.html'
    r=requests.get(url)
    r.encoding=r.apparent_encoding
    html=etree.HTML(r.text)
    a=html.xpath('//table[@class="tbspan"]//a')
    for i in a:
        i1,i2=i.xpath('./@href')[0],i.xpath('./text()')[0]
        listInfo.append((i1,i2))
    if l<num:
        getInfo(listInfo,l+1,num)


def print_dianying(listInfo):
    f=open('collection/dianying.txt','w')
    f.close()
    with open('collection/dianying.txt','a',encoding='utf-8') as f:
        for info in listInfo:
            f.write(info[0]+'   '+info[1]+'\n')
        f.close()

def getDetail(listInfo,index=0):
    url=base_url+listInfo[index][0]
    r=requests.get(str(url))
    r.encoding=r.apparent_encoding
    # with open('collection/dianying.html','w',encoding='utf-8') as f:
    #     f.write(r.text)
    html=etree.HTML(r.text)
    detail={
        'name':'',
        'type':'',
        'time':'',
        'controduct':'',
        'download':''
    }
    div=html.xpath('//div[@id="Zoom"]')[0]
    text=div.xpath('.//p/text()')
    detail['name']=text[0]
    detail['type']=text[6]
    detail['time']=text[9]
    detail['controduct']=text[30]
    detail['download']=div.xpath('.//a/@href')[0]
    return detail
    # for i1,i2 in detail.items():
    #     print(i1,"  ",i2)

def print_detail(detail):
    f=open('collection/dianying_detail.txt','w')
    f.close()
    with open('collection/dianying_detail.txt','a',encoding='utf-8') as f:
        for i1,i2 in detail.items():
            f.write(i1+"  "+i2+"\n")

if __name__=='__main__':
    listInfo=[]
    #你想获取多少个页面num,l为从第几个页面开始
    getInfo(listInfo,num=1)
    #参数index为获取第几个电影的名字下载链接等等
    detail=getDetail(listInfo)
    print_detail(detail)
